#####
# NRS MODELLING
# 2010 - 2016

library(dplyr)
library(scales)
library(mgcv)

#### READ IN THE DATA ####

nrsdata <- read.csv2(file = "Pied Flycatcher Phenology/Interval estimation/Laying/data/NRS_processeddata.csv")
head(nrsdata)
colnames(nrsdata)[3] <- "juliandate" 
#### MAIN MODEL YEARS 2013-2014 ####
for(yr in 2013:2014)
  {
    mod <- gam(presence ~ te(juliandate,northing10,bs="ts", k=5)+s(altitude, k = 5)+s(suburban, k=5, bs = "ts")+
                          s(grassland, k=5, bs = "ts")+s(broadleaf, k=5, bs = "ts")+s(conifer, k=5,bs="ts"),
           data = nrsdata, subset = year == yr, family = binomial)
  }

#### BOOTSTRAPPING

#

# when does the peak in nesting occur #
firstpeak <- function(x){
  x1 <- x[1:(length(x)-1)] 
  x2 <- x[2:(length(x))] 
  xdif <- x2-x1
  return (min(which(xdif <= 0)))
}

# for (yr in 2013:2016)
# {
#   dir.create(paste0(yr)) 

#   
  speciesdata <- filter(nrsdata, year == yr)

  for (boot in 1:b){
    # track time
    strt <- Sys.time()
    # set seed
    set.seed(boot)
    # use dplyr function to sample from dataset randomly with replacement
    bootdata <- sample_frac(speciesdata, replace = TRUE)
    
    # create folder to store bootstraps
    dir.create(paste0(yr,"/","nrs_boot_",boot))
    # update data and fit model
    bootdata <- sample_frac(speciesdata, size = 1, replace = T)
    mod <- gam(presence ~ te(juliandate,northing10,bs="ts", k=5)+s(altitude, k=5)+s(suburban, k=5, bs = "ts")+
                 s(grassland, k=5, bs = "ts")+s(broadleaf, k=5, bs = "ts")+s(conifer, k=5,bs="ts"),
               data = bootdata, family = binomial)
    
    juliandate <- seq(min(bootdata$juliandate), max(bootdata$juliandate), by=.1)
    northing10 <- seq(min(speciesdata$northing10),max(speciesdata$northing10),10000)
    
    # predict over a median habitat composition
    predframe <- expand.grid(juliandate=juliandate, northing10 = northing10,
                             broadleaf = median(speciesdata$broadleaf), altitude=median(speciesdata$altitude,na.rm = TRUE), suburban = median(speciesdata$suburban),
                             conifer = median(speciesdata$conifer), grassland = median(speciesdata$grassland))
    predL <- predict(mod, newdata=predframe, type="response", se.fit=T)
    predmatrix <- as.matrix(xtabs(predL$fit ~ predframe$juliandate+predframe$northing10))

    # #obtaining standardized matrix
    cmatrix <- t(predmatrix) #transposing
    maxvector <- apply(cmatrix,1, FUN = max) #1 denotes rows
    for (i in 1:nrow(cmatrix)){cmatrix[i,] <- cmatrix[i,]/maxvector[i]}
    # dmatrix <- t(cmatrix)
    # image(juliandate,northing10, dmatrix)
    # #d is the standardized daysxnorthings matrix; c is the transposed matrix, northxdays
    # 
    #####
    
    ## getting the first peaks for each curve
    colno <- apply(cmatrix, 1, FUN = firstpeak)
    dayno <- as.numeric(colnames(cmatrix)[colno])
    
    ## obtaining unsampled edges
    maxnas <- which(as.numeric(rownames(cmatrix))> max(bootdata$northing10))
    minnas <- which(as.numeric(rownames(cmatrix))< min(bootdata$northing10))
    dayno[maxnas] <- NA
    dayno[minnas] <- NA
    northnas <- which(as.numeric(colnames(cmatrix))> max(bootdata$juliandate))
    southnas <- which(as.numeric(colnames(cmatrix))< min(bootdata$juliandate))
    ## getting label for northings
    colnos <- as.numeric(rownames(cmatrix))
    colnos[northnas] <- NA
    colnos[southnas] <- NA
    #####
    ## saving the output files
    saveRDS(mod,file= paste0(bootfolder,"modboot_",boot,".rds"))
    saveRDS(dayno, paste0(bootfolder,"daynos_",boot,".rds"))
    saveRDS(colnos, paste0(bootfolder,"colnos_",boot,".rds"))
    endx <- Sys.time()
    print (paste(boot, "took", endx - strt, "secs"))
  }
# }
